package com.shengzai.rdd

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object Demo8GroupByKey {
  def main(args: Array[String]): Unit = {

    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("Filter")
    val sc = new SparkContext(conf)

    val stuRDD: RDD[String] = sc.textFile("hadoop_code/src/data/students.txt")

    val mapRDD: RDD[(String, String)] = stuRDD.map(
      line => {
        val split: Array[String] = line.split(",")
        (split.last, split(1))
      }
    )
    /**
     * kv格式才能进行groupByKey
     */
    val groupByKeyRDD: RDD[(String, Iterable[String])] = mapRDD.groupByKey()

    groupByKeyRDD.foreach(println)

  }

}
